rm(list=ls(all=TRUE))
require(plyr)
require(tidyverse)
#install.packages("dplyr")
library(dplyr)
library(ggridges)
## Good cb-friendly palette
library(ggokabeito)
## Fonts aren't necessary to reproduce.
## Comment out and change the default theme as needed.
library(myriad)
library(showtext)
library(readxl)
require(sandwich)
require(lmtest)
require(foreign)
require(reshape2)
require(data.table)
library(quantreg)
library(geomtextpath)
require(stringr)

setwd("C:/Users/zelizer/Dropbox/priors/Replication File/")

ggtheme_personalized <- theme(panel.background = element_blank(),
                              panel.grid = element_blank(),
                              axis.ticks = element_blank(),
                              axis.title.x = element_text(color = "black" , size = 12, face="bold"),
                              axis.title.y = element_text(color = "black" , size = 12, face="bold"),
                              axis.text.y = element_text(size = 8, colour = "black"),
                              axis.text.x = element_text(size = 8, colour = "black"),
                              panel.border = element_blank(),
                              panel.spacing = unit(0.5, "cm"),
                              legend.key = element_rect(colour = "white",fill = "white"),
                              legend.text = element_text(size=10,color="black"),
                              legend.title = element_text(size = 11, face = "bold", colour = "black"),
                              strip.text.x = element_text(size = 11, face = "bold", colour = "black"),
                              strip.background = element_rect(colour = "white", fill = "white"),
                              legend.position="none",
                              plot.margin = unit(c(0.5,0.5,0.5,0.5), "cm"))

#library(showtext)
# Once it's installed, import the fonts as before:
library(extrafont)
# fonts()
# remotes::install_version("Rttf2pt1", version = "1.3.8")
# extrafont::font_import(path = "C:/Users/Zelizer/Downloads")
# extrafont::font_import()
loadfonts(device = "win")

#showtext_auto()
#import_myriad_semi()
theme_figs <- function(){
#  theme_myriad_semi(base_family = windowsFonts()[4]) +
    theme_myriad_semi(base_family = fonts()[1],
                      subtitle_family = fonts()[1],
                      caption_family = fonts()[1],) +
    theme(
      plot.background = element_rect(color = "white"),
      plot.title = element_text(size = rel(1.5)),
      plot.subtitle = element_text(size = rel(1.2)),
      axis.title.x = element_text(size = rel(1.25)),
      axis.title.y = element_text(size = rel(1.25)),
      #      axis.text.x = element_text(size = rel(1.2)),
      axis.text.y.left = element_text(size = rel(1.2)),
      axis.text.y.right = element_text(size = rel(0.9)))
      #text=element_text(family=windowsFonts()[4]))
}
# theme_set(theme_minimal())
theme_set(theme_figs())
## Colors for manual use as needed
my_oka <- palette_okabe_ito(order = c(1, 2, 3, 5, 7),
                            alpha = NULL)

###########################################################################
####      -2. Import notes and disposition for firscal review cmte     ####
###########################################################################

df0 <- read.csv("2019_fiscalreview_cmte_bills.csv" , stringsAsFactors = FALSE)
head(df0)
dim(subset(df0 , !is.na(cost)))
df0 <- subset(df0 , !is.na(cost))
df0$enacted <- ifelse(df0$Last.Action=="Delivered to Secretary of State (G)" ,
                      "Enacted" , "Failed")
table(df0$enacted)

###########################################################################
####      -1. Import survey results for legislators      #########
###########################################################################

### Use this data to create anonyimized data frames (original, non-anonymized data not included
### in replication file)

# data <- read.csv(file = "MO_outcomes.csv" , stringsAsFactors = FALSE) %>%
#   mutate(estimate = as.numeric(str_replace_all(estimate,",","")) / 1000000)
# table(data$estimate)
# ### 11 obs are outside bounds; change to bounds of range
# data$estimate[!is.na(data$estimate) & data$estimate > 10] <- rep(10 , 9)
# data$estimate[!is.na(data$estimate) & data$estimate < -10] <- rep(-10 , 2)
# 
# 
# 
# billnums <- c("HB 26","HB 37","HB 43","HB 299","HB 312","HB 333","HB 404",
#     "HB 547","HB 585","HB 715")
# 
# real_costs <- data.frame(bill = 1:10,
#                          cost = c(1.7175 , -1.028 , 0.254 , 1.842 ,
#                                   5.105 , 0.074 , 9.63 , 0.177 , -1.431 ,
#                                   0.309),
#                          billnums) %>%
#               mutate(. , bill_factor = factor(billnums)) %>%
#               mutate(. , bill_factor = factor(bill_factor ,
#                                               levels = levels(bill_factor)[c(1,5,7,2,3,4,6,8,9,10)])) %>%
#   #              levels = levels(bill_factor)[c(1,3,4,5,6,7,8,9,10,2)])) %>%
#   mutate(. , Party.sponsor = c("Republican","Republican","Democrat","Republican","Democrat",
#                                            "Republican","Republican","Republican","Republican","Republican"))
# 
# 
# data <- join(data , real_costs) %>%
#   mutate(. , error = estimate - cost)
# 
# leg_info <- read.csv(file = "MO_legislator_information.csv" , stringsAsFactors = FALSE)
# 
# 
# 
# data <- join(data , leg_info[,c("leg_name","Party","acu","YearEnter","Gender","Subject","chair","declined","never")]) %>%
# mutate(. , Party2 = ifelse(Party == Party.sponsor , 'Same Party' , 'Opposite Party'))
# 
# # leg_info <- join(leg_info[,c("leg_name","Party","acu","YearEnter","Gender","Subject","chair","declined","never")] , data) %>%
# #   mutate(. , Party2 = ifelse(Party == Party.sponsor , 'Same Party' , 'Opposite Party'))
# 
# data$leg_name <- paste(as.numeric(factor(data$leg_name)))
# save(data , file = "anonymized_data.RData")
# 
# leg_info <- subset(leg_info , select = c("Party","YearEnter","Gender","Subject","acu","chair","declined","never"))
# save(leg_info , file = "anonymized_leg_data.RData")


###########################################################################
##################  -1. Review Participants  ##############################
###########################################################################

load("anonymized_data.Rdata")
load("anonymized_leg_data.Rdata")

ddply(leg_info , .(Subject) , function(x) out = data.frame(
  N = nrow(x),
  Terms = mean(ceiling((2019 - x$YearEnter)/2)),
  Freshman = mean(x$YearEnter==2018),
  Gender = mean(x$Gender=="F"),
  Party = mean(x$Party=="Republican"),
  Chair = mean(x$chair)
))

ddply(leg_info , .(Subject) , function(x) out = data.frame(
  N = nrow(x),
  Terms = mean(ceiling((2019 - x$YearEnter)/2)),
  Freshman = mean(x$YearEnter==2018),
  Gender = mean(x$Gender=="F"),
  Party = mean(x$Party=="Republican"),
  Chair = mean(x$chair)
))

t.test(as.numeric(YearEnter==2018) ~ declined , subset(leg_info , never==0))
t.test(as.numeric(Gender=="M") ~ declined , subset(leg_info , never==0))
t.test(as.numeric(Party=="Republican") ~ declined , subset(leg_info , never==0))
t.test(chair ~ declined , subset(leg_info , never==0))
t.test(acu ~ declined , subset(leg_info , never==0 & Party=="Democrat"))
t.test(acu ~ declined , subset(leg_info , never==0 & Party=="Republican"))

t.test(as.numeric(YearEnter==2018) ~ Subject , leg_info)
t.test(as.numeric(Gender=="M") ~ Subject , leg_info)
t.test(as.numeric(Party=="Republican") ~ Subject , leg_info)
t.test(chair ~ Subject , leg_info)
t.test(acu ~ Subject , subset(leg_info , Party=="Democrat"))
t.test(acu ~ Subject , subset(leg_info , Party=="Republican"))

###########################################################################
######################  0. Fiscal Review  #################################
###########################################################################


pdf(
  "fiscal_review_bills.pdf",
  height = 4,
  width = 6,
  onefile = FALSE
)
subset(df0 , cost!=0) %>%
  ggplot(data =  ., aes(x = cost)) +
  geom_histogram(color = "grey30" , fill = "grey30" ,
                 alpha = 0.2 ,
                 bins = 20) +
  scale_x_continuous(breaks = c(-20, -10 , 0 , 10 , 20),
                     limits = c(-25.5 , 25.5)) +
  labs(x = "Bill Projected Cost (-) or Benefit (+) (in $M)" ,
       y = "Number of Bills") +
  facet_wrap(as.factor(subset(df0 , cost!=0)$enacted))
dev.off()

subset(df0 , cost!=0) %>%
  with(. , mean(abs(cost) <= 10))
### 90% of bills with a forecasted cost or benefit fall within our range
subset(df0 , cost!=0) %>%
  with(. , sum(cost < 0 & cost >= -1))
subset(df0 , cost!=0) %>%
  with(. , sum(cost < 0 & cost >= -.1))

subset(df0 , cost!=0) %>%
  with(. , mean(enacted=="Enacted"))

subset(df0 , cost!=0 & enacted=="Enacted") %>%
  with(. , mean(abs(cost) <= 10))
### 93% of bills with a forecasted cost or benefit fall within our range

subset(df0 , cost!=0) %>%
  dim()

subset(df0 , cost!=0 & cost < 0 & cost > -.074) %>%
  with(. , Bill)

subset(df0 , cost!=0 & cost < 0 & cost > -.074) %>%
  with(. , sum(enacted=="Enacted"))

subset(df0 , cost!=0 & enacted=="Failed") %>%
  with(. , sum(cost))

subset(df0 , cost!=0 & enacted=="Failed") %>%
  dim(.)




###########################################################################
######################  0. Prior beliefs  #################################
###########################################################################



temp_data <- data %>%
  # join(. , leg_info , type = "left") %>%
  subset(. , t %in% c("Low")) %>%
  mutate(. , Date = factor(
    substr(interview.date , 1 , 1) ,
    levels = c("2","3","4"),
    labels = c("February","March","April")
  )) %>%
  mutate(. , Tenure = ifelse(YearEnter == 2018 , 'First Term' , 'Incumbent'))
temp_data <- temp_data[,c(1:ncol(temp_data))[colnames(temp_data)!="billnums"]]

t.test(temp_data$error)
ddply(temp_data , .(bill) , function(x) mean(x$error))
with(temp_data , quantile(error , c(0.25 , 0.5 , 0.75)))
mean(temp_data$error > 0)

my_oka1 <- rep(my_oka , 2)
  #showtext_auto()
loadfonts(device="postscript")
pdf(
  "beliefs1.pdf",
  height = 4,
  width = 8,
  onefile = FALSE,
  family = 'Myriad Pro'
)
temp_data %>%
  ggplot() +
  geom_histogram(data = temp_data ,
                 mapping = aes(x = error, y = ..density..*2),
                 stat = "bin",
                 bins = 20, size = 0.5,
                 alpha = 0.7) +
  geom_vline(xintercept = 0 , linetype = 1 , color = "gray30") +
  geom_step(mapping = aes(x = error, y = ..density..*2),
            bins = 20, alpha = 0.9,
            color = "gray30", size = 0.6,
            stat = "bin",
            direction = "mid") +
  geom_textvline(xintercept = -.606, label = "25th Percentile = -0.6", hjust = 0.8,
                 linetype = 2, vjust = -0.3, color = my_oka1[1], fontface = "bold") +
  geom_textvline(xintercept = 1.028, label = "50th Percentile =  1.0", hjust = 0.8,
                 linetype = 2, vjust = 1.3, color = my_oka1[3], fontface = "bold") +
  geom_textvline(xintercept = 4.691, label = "75th Percentile =  4.7", hjust = 0.8,
                 linetype = 2, vjust = 1.3, color = my_oka1[5], fontface = "bold") +
  # geom_vline(data = tempdata ,
  #            aes(xintercept = cost ), color = "red" , linetype="dashed") +
  #  scale_fill_manual(values = alpha(my_oka1, 0.8)) +
  #  scale_color_manual(values = alpha(my_oka1, 1)) +
  lims(x = c(-22 , 22))  +
  guides(color = "none", fill = "none") +
  labs(x = "Error of Forecast (in $M)", y = "Density")
dev.off()


temp_data %>%
  with(. , mean(abs(error)>5))
temp_data %>%
  with(. , mean(sign(estimate)==sign(cost)))
temp_data %>%
  with(. , cor(estimate,cost))
temp_data %>%
  with(. , mean(error))
temp_data %>%
  with(. , mean(error>0))


summary(m <- lm(error ~ Party + Party.sponsor + as.numeric(Party==Party.sponsor)+
                  as.numeric(2019 - YearEnter) + as.factor(Gender=="F"), data = temp_data))
coeftest(m, vcov = vcovHC(m, type="HC1"))

temp_data$pred_estimate <- temp_data$estimate
temp_data$pred_estimate[temp_data$Party2=="Opposite Party"] <-
  temp_data$pred_estimate[temp_data$Party2=="Opposite Party"] - 2.7348217

with(temp_data , mean(estimate - cost))
with(temp_data , mean(pred_estimate - cost))
0.9799778 / 1.389101

###########################################################################
######################  1. Posterior beliefs  #############################
###########################################################################
temp_data <- data %>%
  subset(. , t %in% c("Low","High")) %>%
  mutate(. , Date = factor(
    substr(interview.date , 1 , 1) ,
    levels = c("2","3","4"),
    labels = c("February","March","April")
  )) %>%
  mutate(. , Tenure = ifelse(YearEnter == 2018 , 'First Term' , 'Incumbent'))
temp_data <- temp_data[,c(1:ncol(temp_data))[colnames(temp_data)!="billnums"]]


subset(temp_data , t=="High") %>% with(. , mean(error))
subset(temp_data , t=="High" & error > 0) %>% with(. , mean(error))
subset(temp_data , t=="High" & error < 0) %>% with(. , mean(error))
ddply(temp_data , .(bill) , function(x) mean(x$error))
subset(temp_data , t=="High") %>% with(. , quantile(error , c(0.25 , 0.5 , 0.75)))



pdf(
    "beliefs_posterior1.pdf",
    height = 4,
    width = 8,
    onefile = FALSE,
    family = 'Myriad Pro'
  )
temp_data %>%
  subset(. , t=="High") %>%
  ggplot() +
  geom_histogram(mapping = aes(x = error, y = ..density..*2),
                 stat = "bin",
                 bins = 20, size = 0.5,
                 alpha = 0.7) +
  geom_vline(xintercept = 0 , linetype = 1 , color = "gray30") +
  geom_step(mapping = aes(x = error, y = ..density..*2),
            bins = 20, alpha = 0.9,
            color = "gray30", size = 0.6,
            stat = "bin",
            direction = "mid") +
  geom_textvline(xintercept = -.132, label = "25th Percentile = -0.1", hjust = 0.8,
                 linetype = 2, vjust = -0.3, color = my_oka1[1], fontface = "bold") +
  geom_textvline(xintercept = 0.37, label = "50th Percentile =  0.4", hjust = 0.8,
                 linetype = 2, vjust = 1.3, color = my_oka1[3], fontface = "bold") +
  geom_textvline(xintercept = 2.053, label = "75th Percentile =  2.1", hjust = 0.8,
                 linetype = 2, vjust = 1.3, color = my_oka1[5], fontface = "bold") +
  # geom_vline(data = tempdata ,
  #            aes(xintercept = cost ), color = "red" , linetype="dashed") +
  #  scale_fill_manual(values = alpha(my_oka1, 0.8)) +
  #  scale_color_manual(values = alpha(my_oka1, 1)) +
  lims(x = c(-22 , 22))  +
  guides(color = "none", fill = "none") +
  labs(x = "Error of Forecast (in $M)", y = "Density")
dev.off()


summary(lm(error ~ as.factor(t=="High"), data = temp_data))
m1 <- lm(error ~ as.factor(bill)+ as.factor(leg) + as.factor(t=="High"), data = temp_data)
coeftest(m1, vcov = vcovHC(m1, type="HC1"))

rq(error ~ as.factor(t=="High"), data = temp_data, tau = c(0.5)) %>%
  summary(. , se = "boot")


summary(lm(abs(error) ~ as.factor(t=="High") , data = temp_data))
m1 <- lm(abs(error) ~ as.factor(bill)+ as.factor(leg) +as.factor(t=="High"), data = temp_data)
coeftest(m1, vcov = vcovHC(m1, type="HC1"))

m1 <- lm(as.numeric(error>0) ~ as.factor(bill)+as.factor(leg) + as.factor(t=="High"),
         data = temp_data)
coeftest(m1, vcov = vcovHC(m1, type="HC1"))
m1 <- lm(as.numeric(error>0)*100 ~ as.factor(t=="High"), data = temp_data)
coeftest(m1, vcov = vcovHC(m1, type="HC1"))


### Correlates of bias

m <- mutate(temp_data , t = as.numeric(t=="High")) %>%
  lm(error ~ Party*t + Party.sponsor*t + as.numeric(Party==Party.sponsor)*t+
       as.numeric(2019 - YearEnter)*t + as.factor(Gender=="F")*t, data = .)
coeftest(m, vcov = vcovHC(m, type="HC1"))

###########################################################################
######################       2. Support       #############################
###########################################################################
temp_data <- subset(data , t!="Control")

m2 <- lm(as.numeric(support=="Yes")*100 ~ as.factor(t=="High") * cost, data = temp_data)
coeftest(m2, vcov = vcovHC(m2, type="HC1"))

### Table F1
m2 <- lm(as.numeric(support=="Yes")*100 ~ as.factor(t=="High") * estimate +
           as.factor(t=="High") *cost, data = temp_data)
summary(m2)
coeftest(m2, vcov = vcovHC(m2, type="HC1"))



############# VOTING
temp_data <- subset(data , t!="Control" & !is.na(vote) & vote!=9)
dim(temp_data)
m2 <- lm(as.numeric(vote==1)*100 ~ as.factor(t=="High") * cost, data = temp_data)
coeftest(m2, vcov = vcovHC(m2, type="HC1"))
table(temp_data$t)


# RI for treatment effects on voting
nsims <- 10000
sim_out <- rep(NA , nsims)

tempdf <- subset(temp_data , select = c("bill","cost")) %>% unique()
set.seed(123456)
for(i in 1:nsims){
  tempdf2 <- mutate(tempdf , cost2 = sample(tempdf$cost , 6 , replace = FALSE))
  temp_data2 <- join(temp_data , tempdf2 , by = c("bill","cost"))

  sim_out[i] <- lm(as.numeric(vote==1)*100 ~ as.factor(t=="High") * cost2, data = temp_data2)$coef["cost2"]
}
hist(sim_out)
sd(sim_out)
mean(sim_out <= -4.59557)
length(unique(sim_out))
### great - this has brute forced the 720 possilbe permutations
mean(unique(sim_out) <= -4.59557)
sum(unique(sim_out) <= -4.59557)






### robustness checks for Table 6
temp_data <- subset(data , t!="Control" & !is.na(vote) & vote!=9)
m2 <- lm(as.numeric(support=="Yes")*100 ~ as.factor(t=="High") * cost, data = temp_data)
coeftest(m2, vcov = vcovHC(m2, type="HC1"))


temp_data <- subset(data , t!="Low" & !is.na(vote) & vote!=9)
m2 <- lm(as.numeric(vote==1)*100 ~ as.factor(t=="High"), data = temp_data)
coeftest(m2, vcov = vcovHC(m2, type="HC1"))

m2 <- lm(as.numeric(vote==1)*100 ~ as.factor(t=="High") * cost, data = temp_data)
summary(m2)
coeftest(m2, vcov = vcovHC(m2, type="HC1"))

